Grafici andamento Covid-19

Data e Ora ultimo aggiornamento

In [1]:
import datetime

print(datetime.datetime.today())
2020-10-30 21:08:04.933439
In [2]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Clicca qui per nascondere / mostrare il codice"></form>''')
Out[2]:
In [3]:
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px


import warnings
warnings.filterwarnings('ignore')
In [4]:
url_r = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv"
data_region = pd.read_csv(url_r)
#print(data_region.dtypes)
#print(data_region.isnull().sum())
#print(data_region.shape)
#print(data_region.head())

url_p = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv"
data_province = pd.read_csv(url_p)
#print(data_province.dtypes)
#print(data_province.isnull().sum())
#print(data_province.shape)
#print(data_province.head())

url_n = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"
data_national = pd.read_csv(url_n)
#print(data_national.dtypes)
#print(data_national.isnull().sum())
#print(data_national.shape)
#print(data_national.head())

Tabella dei dati degli ultimi giorni

In [5]:
#add daily cases/deaths/recovered columns
data_national['new_cases'] = data_national['totale_casi'].diff()
data_national['new_deaths'] = data_national['deceduti'].diff()
data_national['new_recovered'] = data_national['dimessi_guariti'].diff()
data_national['new_swabs'] = data_national['tamponi'].diff()
data_national['new_unique_tested'] = data_national['casi_testati'].diff()
#add a day/day-1 percentage change for new_cases
data_national['daily_cases_perc_change'] = round((data_national['new_cases'].pct_change(1))*100,2)
data_national['daily_swab_perc_change'] = round((data_national['new_swabs'].pct_change(1))*100,2)
data_national['daily_unique_tested_perc_change'] = round((data_national['new_unique_tested'].pct_change(1))*100,2)
#detect ratio
data_national['detect_ratio_swabs'] = round((data_national['new_cases'] / data_national['new_swabs'])*100,2)
data_national['detect_ratio_cases'] = round((data_national['new_cases'] / data_national['new_unique_tested'])*100,2)
data_national.tail(10)
Out[5]:
data stato ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi dimessi_guariti ... new_cases new_deaths new_recovered new_swabs new_unique_tested daily_cases_perc_change daily_swab_perc_change daily_unique_tested_perc_change detect_ratio_swabs detect_ratio_cases
240 2020-10-21T17:00:00 ITA 9057 926 9983 145459 155442 12703 15199 257374 ... 15199.0 127.0 2369.0 177848.0 106488.0 39.81 22.88 21.45 8.55 14.27
241 2020-10-22T17:00:00 ITA 9694 992 10686 158616 169302 13860 16079 259456 ... 16078.0 136.0 2082.0 170392.0 104872.0 5.78 -4.19 -1.52 9.44 15.33
242 2020-10-23T17:00:00 ITA 10549 1049 11598 174404 186002 16700 19143 261808 ... 19143.0 91.0 2352.0 182032.0 114499.0 19.06 6.83 9.18 10.52 16.72
243 2020-10-24T17:00:00 ITA 11287 1128 12415 190767 203182 17180 19644 264117 ... 19640.0 151.0 2309.0 177669.0 109673.0 2.60 -2.40 -4.21 11.05 17.91
244 2020-10-25T17:00:00 ITA 12006 1208 13214 209027 222241 19059 21273 266203 ... 21273.0 128.0 2086.0 161880.0 101876.0 8.31 -8.89 -7.11 13.14 20.88
245 2020-10-26T17:00:00 ITA 12997 1284 14281 222403 236684 14443 17012 268626 ... 17007.0 141.0 2423.0 124686.0 78816.0 -20.05 -22.98 -22.64 13.64 21.58
246 2020-10-27T17:00:00 ITA 13955 1411 15366 239724 255090 18406 21994 271988 ... 21989.0 221.0 3362.0 174398.0 106346.0 29.29 39.87 34.93 12.61 20.68
247 2020-10-28T17:00:00 ITA 14981 1536 16517 259940 276457 21367 24991 275404 ... 24988.0 205.0 3416.0 198952.0 121820.0 13.64 14.08 14.55 12.56 20.51
248 2020-10-29T17:00:00 ITA 15964 1651 17615 281576 299191 22734 26831 279282 ... 26829.0 217.0 3878.0 201452.0 118857.0 7.37 1.26 -2.43 13.32 22.57
249 2020-10-30T17:00:00 ITA 16994 1746 18740 307046 325786 26595 31084 283567 ... 31079.0 199.0 4285.0 215085.0 129688.0 15.84 6.77 9.11 14.45 23.96

10 rows × 27 columns

In [6]:
#regional data preparation

data_region_Abruzzo = data_region[(data_region['denominazione_regione'] == 'Abruzzo')]
data_region_Basilicata = data_region[(data_region['denominazione_regione'] == 'Basilicata')]
data_region_Bolzano = data_region[(data_region['denominazione_regione'] == 'P.A. Bolzano')]
data_region_Calabria = data_region[(data_region['denominazione_regione'] == 'Calabria')]
data_region_Campania = data_region[(data_region['denominazione_regione'] == 'Campania')]
data_region_EmiliaR = data_region[(data_region['denominazione_regione'] == 'Emilia-Romagna')]
data_region_Friuli = data_region[(data_region['denominazione_regione'] == 'Friuli Venezia Giulia')]
data_region_Lazio = data_region[(data_region['denominazione_regione'] == 'Lazio')]
data_region_Liguria = data_region[(data_region['denominazione_regione'] == 'Liguria')]
data_region_Lombardia = data_region[(data_region['denominazione_regione'] == 'Lombardia')]
data_region_Marche = data_region[(data_region['denominazione_regione'] == 'Marche')]
data_region_Molise = data_region[(data_region['denominazione_regione'] == 'Molise')]
data_region_Piemonte = data_region[(data_region['denominazione_regione'] == 'Piemonte')]
data_region_Puglia = data_region[(data_region['denominazione_regione'] == 'Puglia')]
data_region_Sardegna = data_region[(data_region['denominazione_regione'] == 'Sardegna')]
data_region_Sicilia = data_region[(data_region['denominazione_regione'] == 'Sicilia')]
data_region_Toscana = data_region[(data_region['denominazione_regione'] == 'Toscana')]
data_region_Trento = data_region[(data_region['denominazione_regione'] == 'P.A. Trento')]
data_region_Umbria = data_region[(data_region['denominazione_regione'] == 'Umbria')]
data_region_VAosta = data_region[(data_region['denominazione_regione'] == "Valle d'Aosta")]
data_region_Veneto = data_region[(data_region['denominazione_regione'] == 'Veneto')]

def region_apply(region):
    for x in region:
        x['new_cases'] =  x['totale_casi'].diff()
        x['new_deaths'] = x['deceduti'].diff()
        x['new_recovered'] = x['dimessi_guariti'].diff()
        x['new_swabs'] = x['tamponi'].diff()
        #add a day/day-1 percentage change for new_cases
        x['daily_cases_perc_change'] = round((x['new_cases'].pct_change(1))*100,2)
        x['daily_swab_perc_change'] = round((x['new_swabs'].pct_change(1))*100,2)
        #detect ratio
        x['detect_ratio'] = round((x['new_cases'] / x['new_swabs'])*100,2)
        return; 

region_apply([data_region_Abruzzo])  
region_apply([data_region_Basilicata]) 
region_apply([data_region_Bolzano])
region_apply([data_region_Calabria])
region_apply([data_region_Campania])
region_apply([data_region_EmiliaR])
region_apply([data_region_Friuli])
region_apply([data_region_Lazio])
region_apply([data_region_Liguria])
region_apply([data_region_Lombardia])
region_apply([data_region_Marche])
region_apply([data_region_Molise])
region_apply([data_region_Piemonte])
region_apply([data_region_Puglia])
region_apply([data_region_Sardegna])
region_apply([data_region_Sicilia])
region_apply([data_region_Toscana])
region_apply([data_region_Trento])
region_apply([data_region_VAosta])
region_apply([data_region_Veneto])
In [7]:
data_region_Nordovest = data_region[(data_region.denominazione_regione.isin(['Piemonte', 'Lombardia', 'Liguria',"Valle d'Aosta"]))]
data_region_Nordest = data_region[(data_region.denominazione_regione.isin(['Emilia-Romagna', 'P.A. Bolzano', 'P.A. Trento', 'Veneto', 'Friuli Venezia Giulia']))]
data_region_Centro = data_region[(data_region.denominazione_regione.isin(['Toscana', 'Umbria', 'Marche', 'Lazio']))]
data_region_Sudisole = data_region[(data_region.denominazione_regione.isin(['Abruzzo', 'Molise', 'Campania', 'Puglia', 'Basilicata', 'Calabria', 'Sicilia', 'Sardegna']))]

cases_Nordovest = data_region_Nordovest.groupby('data').sum()
region_apply([cases_Nordovest])  
cases_Nordovest['data'] = cases_Nordovest.index

cases_Nordest = data_region_Nordest.groupby('data').sum()
region_apply([cases_Nordest])  
cases_Nordest['data'] = cases_Nordest.index

cases_Centro = data_region_Centro.groupby('data').sum()
region_apply([cases_Centro])  
cases_Centro['data'] = cases_Centro.index

cases_Sudisole = data_region_Sudisole.groupby('data').sum()
region_apply([cases_Sudisole])  
cases_Sudisole['data'] = cases_Sudisole.index

cases_Nordovest.tail(5)
Out[7]:
codice_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi ... tamponi casi_testati new_cases new_deaths new_recovered new_swabs daily_cases_perc_change daily_swab_perc_change detect_ratio data
data
2020-10-26T17:00:00 13 180.689065 33.123883 5171 392 5563 73787 79350 4273 5758 ... 4143159 2525578.0 5758.0 45.0 1440.0 35715.0 -34.59 -31.89 16.12 2020-10-26T17:00:00
2020-10-27T17:00:00 13 180.689065 33.123883 5667 444 6111 80337 86448 7098 8677 ... 4192520 2558115.0 8677.0 87.0 1492.0 49361.0 50.69 38.21 17.58 2020-10-27T17:00:00
2020-10-28T17:00:00 13 180.689065 33.123883 6294 480 6774 89327 96101 9653 11388 ... 4257279 2600529.0 11388.0 82.0 1653.0 64759.0 31.24 31.19 17.59 2020-10-28T17:00:00
2020-10-29T17:00:00 13 180.689065 33.123883 6763 548 7311 97984 105295 9194 11120 ... 4319598 2639633.0 11120.0 98.0 1828.0 62319.0 -2.35 -3.77 17.84 2020-10-29T17:00:00
2020-10-30T17:00:00 13 180.689065 33.123883 7320 588 7908 107748 115656 10361 12833 ... 4389386 2683935.0 12833.0 82.0 2390.0 69788.0 15.40 11.99 18.39 2020-10-30T17:00:00

5 rows × 25 columns

Andamento Nazionale

In [8]:
fig2 = px.bar(data_national, x='data', y='totale_casi',
             hover_data=['totale_casi'], color='totale_casi',
             height=600, color_continuous_scale='Sunsetdark')

fig2.update_layout(title_text='Total COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig2.update_yaxes(tick0=0, dtick=25000,  gridcolor='White')
fig2.show()
In [9]:
fig22 = px.bar(data_national, x='data', y='totale_positivi',
             hover_data=['totale_positivi'], color='totale_positivi',
             height=600, color_continuous_scale='Sunsetdark')

fig22.update_layout(title_text='Active COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig22.update_yaxes(tick0=0, dtick=10000,  gridcolor='White')
fig22.show()

Andamento per zone d'Italia

In [10]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_cases'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_cases'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_cases'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_cases'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily Coronavirus new cases - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [11]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_swabs'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_swabs'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_swabs'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_swabs'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily swabs - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [12]:
fig = go.Figure()


fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily Deaths",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily Recovered",
                         line_color='green'))



fig.update_layout(title_text='Daily Coronavirus Deaths and Recoveries - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=500)

fig.show()
In [13]:
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily Cases",
                         line_color='deepskyblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="Daily swabs",
                         line_color='purple'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_unique_tested'], name="Daily unique tested",
                         line_color='red'))
fig.update_layout(title_text='Daily Coronavirus new cases and swabs - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=10000)

fig.show()
In [14]:
fig3 = go.Figure()

fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_swabs'], name="Daily detect ratio - Italy",
                         line_color='purple'))
fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_cases'], name="Daily unique detect ratio - Italy",
                         line_color='red'))


fig3.update_layout(title_text="Daily Swabs detect ratio - Italy",
                  xaxis_rangeslider_visible=True)
fig3.update_yaxes(dtick=5)
In [15]:
fig4 = go.Figure()

#fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
#                         line_color='blue'))
fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['totale_ospedalizzati'], name="Daily total Hospital - Italy",
                         line_color='green'))

fig4.update_layout(title_text="Daily Total Hospital - Italy",
                  xaxis_rangeslider_visible=True)
fig4.update_yaxes(dtick=2000)
In [16]:
fig5 = go.Figure()

fig5.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='blue'))

fig5.update_layout(title_text="Daily Total UTI - Italy",
                  xaxis_rangeslider_visible=True)
fig5.update_yaxes(dtick=200)
In [17]:
fig6 = go.Figure()

fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_cases_perc_change'], name="Daily cases percentual change - Italy",
                         line_color='purple'))
fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_swab_perc_change'], name="Daily swab percentual change - Italy",
                         line_color='red'))


fig6.update_layout(title_text="Daily v- Italy",
                  xaxis_rangeslider_visible=True)
fig6.update_yaxes(dtick=40)
In [18]:
fig7 = go.Figure()

fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily new recovered - Italy",
                         line_color='purple'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily new cases - Italy",
                         line_color='green'))

fig7.update_layout(title_text="Daily change - Italy",
                  xaxis_rangeslider_visible=True)
fig7.update_yaxes(dtick=500)
In [19]:
fig8 = go.Figure()

fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='purple'))
fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))


fig8.update_layout(title_text="Daily UTI vs  Death - Italy",
                  xaxis_rangeslider_visible=True)
fig8.update_yaxes(dtick=200)

Andamento Provincia di Genova

In [20]:
data_ge=data_province[data_province['sigla_provincia']=='GE']
#print(data_ge)
fig9 = go.Figure()

fig9.add_trace(go.Scatter(mode = "lines+markers", x=data_ge['data'], y=data_ge['totale_casi'], name="Daily cases GE - Italy",
                         line_color='red'))



fig9.update_layout(title_text="Daily cases GE - Italy",
                  xaxis_rangeslider_visible=True)
fig9.update_yaxes(dtick=1000)

Andamento Regionale

In [21]:
fig10 = go.Figure()

fig10.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['totale_casi'], name="Daily cases Liguria - Italy",
                         line_color='red'))



fig10.update_layout(title_text="Daily cases Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig10.update_yaxes(dtick=1000)
In [22]:
fig11 = go.Figure()

fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['terapia_intensiva'], name="Daily UTI Liguria - Italy",
                         line_color='red'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['ricoverati_con_sintomi'], name="Daily hospital Liguria - Italy",
                         line_color='purple'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_deaths'], name="Daily new deaths Liguria - Italy",
                         line_color='green'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
                         line_color='yellow'))

fig11.update_layout(title_text="Daily change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig11.update_yaxes(dtick=100)
In [23]:
fig12 = go.Figure()

#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily UTI Liguria - Italy",
#                         line_color='red'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily hospital Liguria - Italy",
#                         line_color='purple'))
fig12.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['detect_ratio'], name="Daily detect  ratio Liguria - Italy",
                         line_color='green'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
#                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
#                         line_color='yellow'))

fig12.update_layout(title_text="Daily detect ratio Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig12.update_yaxes(dtick=20)
In [24]:
fig13 = go.Figure()

fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily cases perc change Liguria - Italy",
                         line_color='red'))
fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily swab perc change Liguria - Italy",
                         line_color='purple'))


fig13.update_layout(title_text="Daily percentual change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig13.update_yaxes(dtick=100)
In [25]:
#print(data_national.dtypes)
In [26]:
import pandas as pd
import numpy as np
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, acf, pacf,arma_order_select_ic
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
import warnings
#Librerie di base
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib.pyplot import figure
import plotly.tools as tls
import math
import statistics as st
import seaborn as sns 
from io import StringIO
import plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pylab as pl
import scipy.stats as scs
from itertools import product                    # some useful functions
from tqdm import tqdm_notebook
import time
import timeit
import pytest
import os
import pyarrow

#Pacchetto Sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
from sklearn.svm import SVC

#Per Modello XGBoost
import xgboost as xgb
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt 

import category_encoders as ce
warnings.simplefilter('ignore')
In [27]:
ds=pd.DataFrame()
ds['data']=data_national['data']
ds['new']=data_national['new_cases']
ds.set_index('data')
ds = ds.dropna()
In [28]:
model = ARIMA(ds['new'], order=(2,2,2))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = pd.DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
residuals.plot(kind='kde')
pyplot.show()
print(residuals.describe())
                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                 D2.new   No. Observations:                  247
Model:                 ARIMA(2, 2, 2)   Log Likelihood               -1948.600
Method:                       css-mle   S.D. of innovations            639.562
Date:                Fri, 30 Oct 2020   AIC                           3909.199
Time:                        21:08:11   BIC                           3930.256
Sample:                             2   HQIC                          3917.677
                                                                              
================================================================================
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
const            8.3306      7.379      1.129      0.259      -6.133      22.794
ar.L1.D2.new     0.5939      0.071      8.415      0.000       0.456       0.732
ar.L2.D2.new    -0.2324      0.074     -3.160      0.002      -0.377      -0.088
ma.L1.D2.new    -1.8117      0.033    -54.226      0.000      -1.877      -1.746
ma.L2.D2.new     0.9276      0.029     31.800      0.000       0.870       0.985
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            1.2778           -1.6342j            2.0745           -0.1444
AR.2            1.2778           +1.6342j            2.0745            0.1444
MA.1            0.9765           -0.3528j            1.0383           -0.0552
MA.2            0.9765           +0.3528j            1.0383            0.0552
-----------------------------------------------------------------------------
                 0
count   247.000000
mean      3.678974
std     642.351604
min   -5231.558122
25%    -154.694386
50%     -22.480699
75%     161.449839
max    3119.266537
In [29]:
forecast = model_fit.forecast(steps=10)[0]
print(forecast)
[31725.81681051 32571.83960516 34378.77222713 36715.37853789
 39148.56400024 41521.34008522 43841.11815559 46148.77975266
 48466.88053217 50799.31551869]
In [30]:
X = ds['new'].values
size = int(len(X) * 0.999)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):
	model = ARIMA(history, order=(2,2,2))
	model_fit = model.fit(disp=0)
	output = model_fit.forecast()
	yhat = output[0]
	predictions.append(yhat)
	obs = test[t]
	history.append(obs)
	print('predicted=%f, expected=%f' % (yhat, obs))
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)
# plot
pyplot.plot(test)
pyplot.plot(predictions, color='red')
pyplot.show()
predicted=27847.267348, expected=31079.000000
Test MSE: 10444095.934
In [31]:
forecast = model_fit.forecast(steps=10)[0]
print(forecast)
[27847.26734803 29107.02188469 30714.02643825 32455.00409498
 34185.49711473 35880.35491561 37562.58232868 39251.26746423
 40951.05446208 42659.63106451]
In [32]:
'''
import warnings
from pandas import read_csv
from pandas import datetime
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error

# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
	# prepare training dataset
	train_size = int(len(X) * 0.90)
	train, test = X[0:train_size], X[train_size:]
	history = [x for x in train]
	# make predictions
	predictions = list()
	for t in range(len(test)):
		model = ARIMA(history, order=arima_order)
		model_fit = model.fit(disp=0)
		yhat = model_fit.forecast()[0]
		predictions.append(yhat)
		history.append(test[t])
	# calculate out of sample error
	error = mean_squared_error(test, predictions)
	return error

# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
	#dataset = dataset.astype('float32')
	best_score, best_cfg = float("inf"), None
	for p in p_values:
		for d in d_values:
			for q in q_values:
				order = (p,d,q)
				try:
					mse = evaluate_arima_model(dataset, order)
					if mse < best_score:
						best_score, best_cfg = mse, order
					print('ARIMA%s MSE=%.3f' % (order,mse))
				except:
					continue
	print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score))

# load dataset


# evaluate parameters
p_values = [0, 1, 2, 4, 5, 6, 8, 10]
d_values = range(0, 5)
q_values = range(0, 5)
#warnings.filterwarnings("ignore")
evaluate_models(ds['new'].values, p_values, d_values, q_values)
'''
Out[32]:
'\nimport warnings\nfrom pandas import read_csv\nfrom pandas import datetime\nfrom statsmodels.tsa.arima_model import ARIMA\nfrom sklearn.metrics import mean_squared_error\n\n# evaluate an ARIMA model for a given order (p,d,q)\ndef evaluate_arima_model(X, arima_order):\n\t# prepare training dataset\n\ttrain_size = int(len(X) * 0.90)\n\ttrain, test = X[0:train_size], X[train_size:]\n\thistory = [x for x in train]\n\t# make predictions\n\tpredictions = list()\n\tfor t in range(len(test)):\n\t\tmodel = ARIMA(history, order=arima_order)\n\t\tmodel_fit = model.fit(disp=0)\n\t\tyhat = model_fit.forecast()[0]\n\t\tpredictions.append(yhat)\n\t\thistory.append(test[t])\n\t# calculate out of sample error\n\terror = mean_squared_error(test, predictions)\n\treturn error\n\n# evaluate combinations of p, d and q values for an ARIMA model\ndef evaluate_models(dataset, p_values, d_values, q_values):\n\t#dataset = dataset.astype(\'float32\')\n\tbest_score, best_cfg = float("inf"), None\n\tfor p in p_values:\n\t\tfor d in d_values:\n\t\t\tfor q in q_values:\n\t\t\t\torder = (p,d,q)\n\t\t\t\ttry:\n\t\t\t\t\tmse = evaluate_arima_model(dataset, order)\n\t\t\t\t\tif mse < best_score:\n\t\t\t\t\t\tbest_score, best_cfg = mse, order\n\t\t\t\t\tprint(\'ARIMA%s MSE=%.3f\' % (order,mse))\n\t\t\t\texcept:\n\t\t\t\t\tcontinue\n\tprint(\'Best ARIMA%s MSE=%.3f\' % (best_cfg, best_score))\n\n# load dataset\n\n\n# evaluate parameters\np_values = [0, 1, 2, 4, 5, 6, 8, 10]\nd_values = range(0, 5)\nq_values = range(0, 5)\n#warnings.filterwarnings("ignore")\nevaluate_models(ds[\'new\'].values, p_values, d_values, q_values)\n'
In [ ]:
 
In [ ]: